if __name__ == '__main__':

    path = 'data/xiaohuangji/xiaohuangji50w_nofenci.txt'
    dataFile = open(path,'r',encoding='utf8')

    qaList = dataFile.readlines()
    print(qaList)
    total = []
    for i in range(len(qaList)):
        A = qaList[i]

        qa = []
        if A == 'E\n':
            qa.append(qaList[i+1])
            qa.append(qaList[i+2])
            total.append(qa)
    print(total)

    resultData = open('data/xiaohuangji/my_formatted_movie_lines.txt', 'w', encoding='utf8')

    for i in total:
        question = i[0].replace('M ','').replace('\n','')
        answer = i[1].replace('M ','')
        corpus = question+'|'+answer
        resultData.write(corpus)

    dataFile.close()
    resultData.close()





